Workflow

The plague-phylogeography snakemake pipeline.

Click the nodes to obtain details about each step.

Alignment

Snippy Multi

Snippy Pairwise

Logs

Alignment

Phylogeny

Metadata

General

Phylogenetics

IQTREE

Post-Alignment

Qualimap

Quality Control

MultiQC

Statistics

If the workflow has been executed in cluster/cloud, runtimes include the waiting time in the queue.

Configuration

Configuration files
File Code
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# Snakemake Configuration File

# Conda Environments
#conda_eager_env : "nf-core-eager-2.2.0dev"

# SQLITE Parameters
sqlite_db : "yersinia_pestis_db.sqlite"
sqlite_select_command_asm : SELECT
                               AssemblyFTPGenbank
                             FROM
                               BioSample
                               LEFT JOIN Assembly
                                 ON AssemblyBioSampleAccession = BioSampleAccession
                                 WHERE (BioSampleComment LIKE '%KEEP%Assembly%Modern%' AND length(AssemblyFTPGenbank) > 0)
#sqlite_select_command_sra : SELECT
#                               BioSampleAccession,
#                               SRARunAccession
#                             FROM
#                               BioSample
#                               LEFT JOIN SRA
#                                 ON SRABioSampleAccession = BioSampleAccession
#                                 WHERE (SRARunAccession = 'SRR1048902' OR SRARunAccession = 'SRR1048905')
sqlite_select_command_sra : SELECT
                              BioSampleAccession,
                              SRARunAccession
                            FROM
                              BioSample
                              LEFT JOIN SRA
                                ON SRABioSampleAccession = BioSampleAccession
                                WHERE (BioSampleComment LIKE '%KEEP%SRA%Ancient%')
sqlite_select_command_ref : SELECT
                               AssemblyFTPGenbank
                             FROM
                               BioSample
                               LEFT JOIN Assembly
                                 ON AssemblyBioSampleAccession = BioSampleAccession
                                 WHERE (BioSampleComment LIKE '%Assembly%Modern%Reference%')
max_datasets_assembly : 600
max_datasets_sra : 200
reads_origin :
  - "assembly"
  - "sra"
  - "local"

# misc filtering
detect_repeats_threshold : 90
detect_repeats_length : 50

reference_locus : "AL590842"
reference_locus_name : "chromosome"
reference_locus_start : "0"
reference_locus_end : "4653728"

# Eager param
eager_rev: "2.2.1"
eager_clip_readlength : 35
eager_bwaalnn : 0.01
eager_bwaalnl : 16
organism : "Yersinia pestis"

# Snippy Parameters
snippy_ctg_depth : 10
snippy_bam_depth : 3
snippy_base_qual : 20
snippy_map_qual : 30
snippy_min_frac : 0.9
snippy_mask_char : "X"
snippy_missing_data : 5
snippy_snp_density : 10
# Make this an empty string if removing singletons
#snippy_keep_singleton : "--keep-singleton"
snippy_keep_singleton: ""
snippy_multi_plot_missing_data:
  - 0
  - 1
  - 2
  - 3
  - 4
  - 5
  - 6
  - 7
  - 8
  - 9
  - 10

# IQTREE
#iqtree_model: "-m MFP"
iqtree_model : "-m K3Pu+F+I"
iqtree_seed : "47321424" # keeping it consistent in a config file allows for checkpointing
#iqtree_outgroup : "Reference"
#iqtree_outgroup : "GCA_000323485.1_ASM32348v1_genomic,GCA_000323845.1_ASM32384v1_genomic"
iqtree_outgroup : "SAMEA3541826,SAMEA3541827"
iqtree_other : "--ufboot 1000 --alrt 1000"
iqtree_runs : 10

Loading...